;;   This file is part of scheme-GNUnet, a partial Scheme port of GNUnet.
;;   Copyright (C) 2006, 2007, 2008, 2009, 2013, 2020, 2021 GNUnet e.V.
;;
;;   scheme-GNUnet is free software: you can redistribute it and/or modify it
;;   under the terms of the GNU Affero General Public License as published
;;   by the Free Software Foundation, either version 3 of the License,
;;   or (at your option) any later version.
;;
;;   scheme-GNUnet is distributed in the hope that it will be useful, but
;;   WITHOUT ANY WARRANTY; without even the implied warranty of
;;   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;;   Affero General Public License for more details.
;;
;;   You should have received a copy of the GNU Affero General Public License
;;   along with this program.  If not, see <http://www.gnu.org/licenses/>.
;;
;;   SPDX-License-Identifier: AGPL-3.0-or-later

;; Author: Christian Grothoff (upstream, C)
;; Author: Maxime Devos (downstream, Scheme)
;; Brief: parse GNUnet configuration files.

;; TODO: unquoting
(define-library (gnu gnunet config parser)
  (export parse-line ;; line parser
	  <position:%> make-%-position %-position?
	  position:%
	  <position:#> make-#-position #{#-position?}#
	  position:#
	  <position:=> make-=-position =-position?
	  position:variable-start position:variable-end
	  position:= position:value-start position:value-end
	  #{<position:[]>}# #{make-[]-position}# #{[]-position?}#
	  position:section-name-start position:section-name-end
	  <position:@inline@> make-@inline@-position @inline@-position?
	  position:@inline@-start position:@inline@-end
	  position:@inline@-filename-start position:@inline@-filename-end

	  ;; expansion parser (data types)
	  <expo:literal> make-literal-position literal-position?
	  <expo:$> make-$-position $-position?
	  #{<expo:${}>}# #{make-${}-position}# #{${}-position?}#
	  #{<expo:${:-}>}# #{make-${:-}-position}# #{${:-}-position?}#

	  expo:literal-start expo:literal-end
	  expo:$-name-start expo:$-name-end
	  #{expo:${}-name-start}# #{expo:${}-name-end}#
	  #{expo:${:-}-name-start}# #{expo:${:-}-name-end}#
	  #{expo:${:-}-value-start}# #{expo:${:-}-value-end}#
	  #{expo:${:-}-value-parts}#

	  ;; expansion parser (conditions)
	  &expansion-violation &empty-variable-violation &missing-close
	  make-empty-variable-violation make-missing-close-violation
	  expansion-violation? empty-variable-violation? missing-close-violation?
	  expansion-violation-position empty-variable-kind missing-close-kind

	  parse-expandable* parse-expandable)
  (import (only (guile)
		eval-when quote char-set
		char-set:whitespace
		string-index
		string-skip string-skip-right string-prefix?)
	  (only (rnrs base)
		begin define lambda define-syntax syntax-rules ...
		assert or + - if char=? not and exact? integer?
		< <= = cons values reverse pair? null?
		string-length string-ref)
	  (only (rnrs control)
		unless)
	  (only (rnrs exceptions)
		raise)
	  (only (rnrs records syntactic)
		define-record-type)
	  (only (rnrs conditions)
		define-condition-type
		&lexical)
	  (only (rnrs lists) memq)
	  (only (gnu gnunet utils hat-let)
		let^))
  (begin
    
    ;; * The position-preserving line parser
    ;;
    ;; This parser operates on a per-line basis without any state.
    ;; It does not directly return configuration values.  Rather,
    ;; it returns the start and end positions.

    ;; Divergence from upstream GNUnet:
    ;; upstream only recognises #\newline, #\return and #\tab,
    ;; while this includes other Unicode whitespace as well.
    ;; Maybe we shouldn't.
    (define whitespace char-set:whitespace)

    ;; The output record types of @code{parse-line}.

    ;; Only defining this at expansion time halves the number
    ;; of output lines of "guild disassemble".
    (eval-when (expand)
      (define-syntax exact-integers?
	(syntax-rules ()
	  ((_ x ...)
	   (and (and (integer? x) (exact? x))
		...))))
      (define-syntax define-positions-type
        (syntax-rules ()
	  ((_ (<positions:type> make-type-positions type-positions?)
	      ((ascending-position-field accessor) ...)
	      (additional-restriction ...)
	      docstring)
	   (define-record-type
	       (<positions:type> make-type-positions type-positions?)
	     (fields (immutable ascending-position-field accessor) ...)
	     (opaque #t)
	     (sealed #t)
	     (protocol
	      (lambda (%make)
		docstring
		(lambda (ascending-position-field ...)
		  (assert (and (exact-integers? ascending-position-field ...)
			       (<= 0 ascending-position-field ...)
			       additional-restriction ...))
		  (%make ascending-position-field ...)))))))))

    (define-positions-type (<position:%> make-%-position %-position?)
      ((% position:%))
      ()
      "@var{%} is the position of the @code{#\\%} comment character in
a comment.")

    (define-positions-type (<position:#> make-#-position #{#-position?}#)
      ((#{#}# position:#))
      ()
      "@var{#} is the position of the @code{#\\#} comment character in
a comment.")

    (define-positions-type (<position:=> make-=-position =-position?)
      ((variable-start position:variable-start)
       (variable-end position:variable-end)
       (= position:=)
       (value-start position:value-start)
       (value-end position:value-end))
      ;; TODO: should empty variable names be allowed?
      ((< variable-start variable-end)
       (<= = value-start))
      "@var{variable-start} (inclusive) and @var{variable-end} (exclusive) are
the start and end positions of the variable name in an assignment.  @var{=} is
the position of the equality sign.  @var{value-start} (inclusive) and
@var{value-end} (exclusive) are the start and end positions of the value.

If the value is empty, then by convention @var{variable-start} and
@var{variable-end} are the positions right after the equality sign.")

    (define-positions-type
      (#{<position:[]>}# #{make-[]-position}# #{[]-position?}#)
      ((section-name-start position:section-name-start)
       (section-name-end position:section-name-end))
      ;; TODO: should empty section names be allowed?
      ;; Also, maybe impose some restrictions on names?
      ;; (Likewise for variable names)
      ()
      "@var{section-name-start} (inclusive) and @var{section-name-end}
(exclusive) are the start and end positions of a section name.")

    (define-positions-type (<position:@inline@> make-@inline@-position
						@inline@-position?)
      ((@inline@-start position:@inline@-start)
       (@inline@-end position:@inline@-end))
      ;; TODO: should empty file names be allowed?
      ;; If so, change < to <=.
      ((< (string-length "@INLINE@ ") (- @inline@-end @inline@-start)))
      "@var{@inline@-start} (inclusive) and @var{@inline@-end} (exclusive)
are the start and end positions of an inclusion directive.")

    (define (position:@inline@-filename-start position)
      "The start position (inclusive) of the file name of the inclusion
directive described by @var{filename}."
      (+ (position:@inline@-start position)
	 (string-length "@INLINE@ ")))

    ;; The end position (exclusive) of the file name.
    (define position:@inline@-filename-end position:@inline@-end)

    (define (parse-line line)
      "Parse a single line @var{line} (without the end of line characters)
from a GNUnet configuration file, into one of its possible types.

@begin itemize
@item The boolean @code{#false} if @var{line} is not recognised.
@item The boolean @code{#true} if @var{line} is an empty line.
@item A @code{<position:%>} or @code{<position:#>} for comment lines
 started with @code{#\\%} and @code{#\\#} respectively.
@item A @code{<position:=>} for variable assignements.
@item A @code{<position:[]>} for section names.
@item A @code{<position:@inline@>} for inclusion directives.
@end itemize

Other syntax may be supported in the future, in which case other data
of other types may be returned."
      ;; Ignore leading whitespace.
      (let^ ((! start-inclusive (string-skip line whitespace))
	     ;; Did the line consist of only whitespace?
	     ;; Then stop.
	     (? (not start-inclusive) #true)
	     (! first-important-character
		(string-ref line start-inclusive))
	     ;; Is this a comment?  Then stop.
	     (? (char=? first-important-character #\#)
		(make-#-position start-inclusive))
	     (? (char=? first-important-character #\%)
		(make-%-position start-inclusive))
	     ;; Ignore trailing whitespace.
	     (! end-inclusive
		(string-skip-right line whitespace start-inclusive))
	     (!! end-inclusive)
	     ;; Is this a section name?  Then stop.
	     (? (and (char=? #\[ first-important-character)
		     (char=? #\] (string-ref line end-inclusive)))
		(#{make-[]-position}# (+ 1 start-inclusive) end-inclusive))
	     ;; Is this an inclusion directive?  Then stop.
	     ;; TODO upstream GNUnet compares case-insensitively.
	     ;; Is this a bug or a feature?
	     (? (and (char=? #\@ first-important-character)
		     (string-prefix? "@INLINE@ " line 1
				     (string-length "@INLINE@ ")
				     ;; XXX what if the file name is empty?
				     (+ 1 start-inclusive) (+ 1 end-inclusive)))
		(make-@inline@-position start-inclusive
					(+ 1 end-inclusive)))
	     ;; Maybe this is an assignment; search for the equality
	     ;; sign.
	     (! =-position (string-index line #\= start-inclusive
					 (+ 1 end-inclusive)))
	     ;; no clue!
	     (? (not =-position) #f)
	     ;; Remove trailing whitespace from the variable name
	     ;; (the ‘tag’).
	     (! variable-end-inclusive (string-skip-right line whitespace
							  start-inclusive
							  =-position))
	     ;; TODO should empty tags by allowed?
	     ;; Bail out if the variable name consists of only whitespace.
	     (? (not variable-end-inclusive) #f)
	     (! variable-end (+ 1 variable-end-inclusive))
	     ;; Remove whitespace from the variable value.
	     (! value-start (string-skip line whitespace (+ 1 =-position)
					 (+ 1 end-inclusive)))
	     (! value-start (or value-start (+ 1 end-inclusive))))
	    (make-=-position start-inclusive variable-end
			     =-position value-start (+ 1 end-inclusive))))

    
    ;; * The (recursive) position-preserving variable substitutions parser.
    ;; We support: "literal-stuff", "${var}" "$var", "${VAR:-stuff}".
    ;; First define some data types.

    (define-positions-type (<expo:literal> make-literal-position literal-position?)
      ((literal-start expo:literal-start)
       (literal-end expo:literal-end))
      ((< literal-start literal-end))
      "@var{literal-start} (inclusive) and @var{literal-end} (exclusive) are
the start and end positions of a region of texts without expansions.")

    (define-positions-type (<expo:$> make-$-position $-position?)
      (($-name-start expo:$-name-start)
       ($-name-end expo:$-name-end))
      ((< $-name-start $-name-end))
      "@var{$-name-start} (inclusive) and @var{$-name-end} (exclusive) are the
start and end positions of a variable name in an expansion X/$VAR/etcetera.")

    (define-positions-type (#{<expo:${}>}# #{make-${}-position}#
			    #{${}-position?}#)
      ((#{${}-name-start}# #{expo:${}-name-start}#)
       (#{${}-name-end}# #{expo:${}-name-end}#))
      ((< #{${}-name-start}# #{${}-name-end}#))
      "@var{$@{@}-name-start} (inclusive) and @var{$@{@}-name-end}
(exclusive) are the start and end positions of a variable name in an expansion
${VAR}.")

    (define-record-type (#{<expo:${:-}>}# #{make-${:-}-position}#
			 #{${:-}-position?}#)
      (fields (immutable #{${:-}-name-start}# #{expo:${:-}-name-start}#)
	      (immutable #{${:-}-name-end}# #{expo:${:-}-name-end}#)
	      (immutable #{${:-}-value-start}# #{expo:${:-}-value-start}#)
	      (immutable #{${:-}-value-end}# #{expo:${:-}-value-end}#)
	      (immutable #{${:-}-value-parts}# #{expo:${:-}-value-parts}#))
      (sealed #t)
      (opaque #t)
      (protocol
       (lambda (%make)
	 (lambda (#{${:-}-name-start}# #{${:-}-name-end}#
		  #{${:-}-value-start}# #{${:-}-value-end}#
		  #{${:-}-value-parts}#)
	   "@var{$@{:-@}-name-start} (inclusive) and @var{$@{:-@}-name-end}
(exclusive) are the start and end positions of a variable name in an expansion
@samp{$@{VAR:-DEFAULT-VALUE@}}.  @var{$@{:-@}-value-start} (inclusive) and
@var{$@{:-@}-value-end} (exclusive) are the start and end positions of
DEFAULT-VALUE.  @var{${:-}-value-parts} is an ordered list of contiguous
expansion position objects, representing the structure of @samp{DEFAULT-VALUE}
(unverified)."
	   (assert (and (exact-integers?
			 #{${:-}-name-start}# #{${:-}-name-end}#
			 #{${:-}-value-start}# #{${:-}-value-end}#)
			(<= 0 #{${:-}-name-start}#)
			(< #{${:-}-name-start}# #{${:-}-name-end}#)
			(= (- #{${:-}-value-start}# #{${:-}-name-end}#) 2)
			(<= #{${:-}-value-start}# #{${:-}-value-end}#)
			(or (pair? #{${:-}-value-parts}#)
			    (null? #{${:-}-value-parts}#))))
	   (%make #{${:-}-name-start}# #{${:-}-name-end}#
		  #{${:-}-value-start}# #{${:-}-value-end}#
		  #{${:-}-value-parts}#)))))

    ;; Now define the possible syntax errors.
    (define-condition-type &expansion-violation &lexical
      %make-expansion-violation expansion-violation?
      (position expansion-violation-position))

    (define (make-expansion-violation position)
      (assert (and (exact-integers? position) (<= 0 position)))
      (%make-expansion-violation position))

    (define-condition-type &empty-variable-violation &expansion-violation
      %make-empty-variable-violation empty-variable-violation?
      ;; $, ${} or ${:-}
      (kind empty-variable-kind))

    (define (make-empty-variable-violation position kind)
      "Make a condition indicating at position @var{position} a variable
name was expected, but only an empty string was found.  The symbol @var{kind}
indicates the type of variable expansion found: @code{$@{:-@}} for variable
expansions with a default, @code{$@{@}} for braced variable expansions without
default and @code{$} for unbraced variable expansions."
      (assert (and (exact-integers? position)
		   (<= 0 position)
		   (memq kind '($ #{${}}# #{${:-}}#))))
      (%make-empty-variable-violation position kind))

    (define-condition-type &missing-close &expansion-violation
      %make-missing-close-violation missing-close-violation?
      ;; ${} or ${:-}
      (kind missing-close-kind))

    (define (make-missing-close-violation position kind)
      "Make a condition indicating at position @var{position} a closing
brace (@code{#\\@}) was expected, but not found.  The symbol @var{kind}
indicates the type of variable expansion found, as in
@code{empty-variable-violation}, though it cannot be @code{$@}."
      (assert (and (exact-integers? position)
		   (<= 0 position)
		   (memq kind '(#{${}}# #{${:-}}#))))
      (%make-missing-close-violation position kind))

    (define cs::-or-close (char-set #\: #\}))
    (define cs:$-or-close (char-set #\$ #\}))
    ;; TODO: should #\0 be included?  It seems to be
    ;; ‘merely’ an artifact of the C implementation.
    ;; TODO: add #\{?
    (define cs:unbraced-end/nested (char-set #\/ #\\ #\0 #\ #\}))
    ;; TODO add #\{, #\} here, I guess? For consistency with bash.
    (define cs:unbraced-end (char-set #\/ #\\ #\0 #\ ))

    (define (parse-expandable* text start end nested?)
      "Search in @var{text} for variable references to expand, returning
a list of expansible position objects and the end position (exclusive,
does not include closing brace).

Alternatively, raise an @code{&expansion-violation}.  If @var{nested?}
is trueish, stop at (and expect) an unbalanced close brace.
If @var{nested?} is Scheme-trueish, it is used as the ‘kind’ argument for
@code{&expansion-violation}.

(In the current parser, in practice this will be @code{#f} or @code{@{:-@}},
but perhaps the syntax will be extended in the future.)

TODO: there currently is not a dedicated condition type for ${a:} and ${a:+}
(in the first, a - after the : is missing, and in the second, + is
invalid).

If @var{nested?} is Scheme-falsish, then the second return value is simply
@var{end} itself."
      (assert (and (exact-integers? start end)
		   (<= 0 start)
		   (<= start end)
		   (<= end (string-length text))))
      (let^ ((/o/ loop
		  ;; in reverse chronological order
		  (accumulated '())
		  ;; where to start searching for the next expansion object
		  (start start))
	     ;; Search for a $ to expand (or a closing brace to stop at,
	     ;; when nested/recursing).
	     (! dollar-close (string-index text (if nested?
						    cs:$-or-close
						    #\$) start end))
	     ;; Add the literal region of text to @var{accumulated}
	     ;; (unless it is empty).
	     (! accumulated
		(if (or (= start (or dollar-close end)))
		    accumulated
		    (cons (make-literal-position start (or dollar-close end))
			  accumulated)))
	     ;; No #\$ and we're not nested/recursing?
	     ;; Then we're done.
	     (? (and (not nested?) (not dollar-close))
		(values (reverse accumulated) end))
	     ;; No #\$ or #\}, but we're nested/recursing?  Then
	     ;; we're missing a close brace.
	     (? (and nested? (not dollar-close))
		(raise (make-missing-close-violation end nested?)))
	     ;; (@var{dollar-close} is trueish now)
	     ;; Did we find a closing brace when nested?
	     ;; Then we're done
	     (? (and nested? dollar-close
		     (char=? #\} (string-ref text dollar-close)))
		(values (reverse accumulated) dollar-close))
	     ;; The character at @var{dollar-close} is a dollar now.
	     (! dollar dollar-close)
	     ;; Empty variable names are not allowed.
	     (? (= (+ 1 dollar) end)
		;; passing @var{kind} here would be incorrect!
		(raise (make-empty-variable-violation (+ 1 dollar) '$)))
	     (! next-character (string-ref text (+ 1 dollar)))
	     ;; Is this an braced variable expansion?
	     (? (char=? next-character #\{)
		(let^ ((! name-start (+ 2 dollar))
		       ;; Then search for a closing }
		       ;; or the : in ${VAR:-DEFAULT}.
		       (! name-end (string-index text cs::-or-close
						 name-start end))
		       ;; There should eventually be at least
		       ;; a closing }.
		       (? (not name-end)
			  (raise (make-missing-close-violation end '#{${}}#)))
		       (! name-end-character
			  (string-ref text name-end))
		       ;; Empty variable names are not allowed.
		       (? (= name-start name-end)
			  (raise (make-empty-variable-violation
				  name-end
				  (if (char=? name-end-character #\:)
				      '#{${:-}}#
				      '#{${}}#))))
		       ;; Was this ${NAME}?
		       (? (char=? name-end-character #\})
			  ;; Then add it to @var{accumulated} and
			  ;; continue.
			  (loop (cons (#{make-${}-position}# name-start name-end)
				      accumulated)
				(+ 1 name-end)))
		       ;; Otherwise, it was ${NAME:-VALUE}.
		       ;; But verify - exists.
		       (? (not (and (< (+ 1 name-end) end)
				    (char=? (string-ref text (+ 1 name-end))
					    #\-)))
			  ;; TODO a more specific condition would be nice.
			  (raise (%make-expansion-violation (+ 1 name-end))))
		       (! value-start (+ 2 name-end))
		       ;; Now parse VALUE in ${NAME:-VALUE}.
		       ;;
		       ;; This procedure call will verify a close
		       ;; brace at @var{default-end} exist.
		       (<-- (value-parts value-end)
			    (parse-expandable* text value-start end '#{${:-}}#))
		       ;; This was violated at some draft of this procedure.
		       ;; Verify it is fixed.
		       (!! (or (pair? value-parts)
			       (null? value-parts)))
		       ;; So the following should be true.
		       ;; (Not related to previous comment.)
		       (!! (char=? #\} (string-ref text value-end))))
		      ;; Add the variable expansion to @var{accumulated}
		      ;; and continue.
		      (loop (cons (#{make-${:-}-position}# name-start name-end
				   value-start value-end value-parts)
				  accumulated)
			    ;; + 1: eat the closing brace.
			    (+ 1 value-end))))
	     ;; Then it is an unbraced $VARIABLE expansion.
	     (! name-start (+ 1 dollar))
	     (! name-end (string-index text (if nested?
						cs:unbraced-end/nested
						cs:unbraced-end)
				       name-start end))
	     (! name-end (or name-end end))
	     ;; Empty variable names are not allowed.
	     (? (= name-start name-end)
		(raise (make-empty-variable-violation name-end '$))))
	    ;; Add the variable to @var{accumulated} and continue.
	    (loop (cons (make-$-position name-start name-end) accumulated)
		  name-end)))

    (define (parse-expandable text)
      (parse-expandable* text 0 (string-length text) #f))))

